This worksheet leans heavily on tidyverse packages.

Setup

Import data

cdl.table <- read_csv("input/TABLES/cdl_crops.csv")                                       # Crop names/attributes
## Parsed with column specification:
## cols(
##   value = col_integer(),
##   cdl_name = col_character(),
##   FAO_shortgroup = col_character(),
##   FAO_ident = col_character(),
##   FAO_group_title = col_integer(),
##   FAO_title = col_character(),
##   FAO_group = col_character()
## )
roi.table <- read_csv("input/SHAPEFILES/cnty24k09_poly/cnty24k09_poly_attributes.csv")    # County names/attributes
## Parsed with column specification:
## cols(
##   centroid_x = col_double(),
##   centroid_y = col_double(),
##   ID = col_integer(),
##   NAME_PCASE = col_character(),
##   NAME_UCASE = col_character(),
##   FMNAME_PC = col_character(),
##   FMNAME_UC = col_character(),
##   ABBREV = col_character(),
##   NUM = col_integer(),
##   ABCODE = col_character(),
##   FIPS = col_integer(),
##   ANSI = col_integer(),
##   ISLAND = col_character(),
##   Shape_Leng = col_double(),
##   Shape_Area = col_double(),
##   HR_NAME = col_character()
## )
ca.counties <- readOGR("input/SHAPEFILES/cnty24k09_poly/cnty24k09_poly_simple.shp")
## OGR data source with driver: ESRI Shapefile 
## Source: "input/SHAPEFILES/cnty24k09_poly/cnty24k09_poly_simple.shp", layer: "cnty24k09_poly_simple"
## with 58 features
## It has 12 fields
## Integer64 fields read as strings:  NUM
ca.hr <- readOGR("input/SHAPEFILES/dwr/dwr_hydrologic_regions.shp")
## OGR data source with driver: ESRI Shapefile 
## Source: "input/SHAPEFILES/dwr/dwr_hydrologic_regions.shp", layer: "dwr_hydrologic_regions"
## with 10 features
## It has 1 fields
wf.master.cyear <- readRDS("output/wfs/wf_total_cyr.rds")
wf.master.wyear <- readRDS("output/wfs/wf_total_wyr.rds")
yield.master <- read_csv("output/yields/NASS_summarized_bycounty.csv")
## Parsed with column specification:
## cols(
##   Year = col_integer(),
##   cdl.name = col_character(),
##   cdl.value = col_integer(),
##   roi.index = col_integer(),
##   County = col_character(),
##   prod.tons = col_integer(),
##   val.usd = col_integer(),
##   hvst.acres = col_integer(),
##   prod.tonne = col_double()
## )

Create some functions for commonly used summary statistics

Typially in a boxplot, outliers are marked as values that fall outside of 1.5 times the interquartile range avove and below the 25% and 75% quantiles. is_extreme() is a function that we define below, that applys the same criteria and returns TRUE if the value is a suspected outlier/extreme falue, and false otherwise.

is_extreme <- function(x) {
  return(x < (quantile(x, 0.25) - (1.5 * IQR(x)) ) | x > (quantile(x, 0.75) + (1.5 * IQR(x)) ))
}

# TMP: Remove problematic entries
wf.master.wyear <- wf.master.wyear[!(wf.master.wyear$cdl.name == "Oats" & wf.master.wyear$County == "Riverside" & wf.master.wyear$Year == 2009),]
#wf.master.cyear[!(wf.master.cyear$cdl.name == "Oats" & wf.master.cyear$County == "Riverside" & wf.master.cyear$Year == 2009),]

Figure: Average water demand by crop for all years

Calculate average statewide water demand for all years

Note that the we have to explicitly ungroup at the end, since things will fail in mysterious ways later on if we don’t remove the grouping metadata. I’m not sure what to think about the explicit ungroup().

wf.master.allcrop.cyear <- wf.master.cyear %>%
  group_by(cdl.value, cdl.name, Year) %>%
  summarize_at(vars(cwr, ppt, et.b, et.g, wf.b, wf.g), funs(sum(., na.rm = TRUE))) %>%
  group_by(cdl.value, cdl.name) %>%
  summarize_at(vars(cwr, ppt, et.b, et.g, wf.b, wf.g), funs(avg = "mean")) %>%
  left_join(cdl.table %>% select(value,`FAO_shortgroup`),
            by = c("cdl.value" = "value")) %>%
  ungroup()

wf.master.allcrop.wyear <- wf.master.wyear %>%
  group_by(cdl.value, cdl.name, Year) %>%
  summarize_at(vars(cwr, ppt, et.b, et.g, wf.b, wf.g), funs(sum(., na.rm = TRUE))) %>%
  group_by(cdl.value, cdl.name) %>%
  summarize_at(vars(cwr, ppt, et.b, et.g, wf.b, wf.g), funs(avg = "mean")) %>%
  left_join(cdl.table %>% select(value,`FAO_shortgroup`),
            by = c("cdl.value" = "value")) %>%
  ungroup()

Plot average Blue WF vs Green WF vs Crop Water Requirement, crop-wise

This segment is not evaluated because there are too many cdl.name classes. It would create unique color for every point, polluting the legend and being generally unreadable.

# Not evaluated
ggplot(data = wf.master.allcrop.cyear) +
  geom_point(mapping = aes(x = wf.b_avg, y = wf.g_avg))

If we color each point by the crop name, it would create unique color for every point, polluting the legend and being generally unreadable.

# Not evaluated
ggplot(data = wf.master.allcrop.cyear) +
  geom_point(mapping = aes(x = wf.b_avg, y = wf.g_avg, color = cdl.name))

It makes sense to group crops by similar classifications. This can be taxonomic (related to crops genotypic similarity), horticultural (related to the plant’s cultivation), economic (related to crops value or markets), or otherwise.

Here, let’s group by the FAO Indicative Crop Classification (ICC 1.1), which groups crops according to growing cycle (row vs perennial), crop genus, and product type. Note, that this classification is still in active development, as of 2017.

ggplot(data = wf.master.allcrop.cyear) +
  geom_point(mapping = aes(x = wf.b_avg, y = wf.g_avg, color = FAO_shortgroup))

We have some outlier crops (plural), so lets plot on a log scale just for perspective.

ggplot(data = wf.master.allcrop.cyear) +
  geom_point(mapping = aes(x = wf.b_avg, y = wf.g_avg, color = FAO_shortgroup)) +
  scale_x_log10() +
  scale_y_log10()

It looks like there may be some pattern? Let’s try a visual first inspection with convex hulls from ggalt::geom_encircle().

ggplot(data = wf.master.allcrop.cyear) +
  geom_point(mapping = aes(x = wf.b_avg, y = wf.g_avg, color = FAO_shortgroup)) +
  scale_x_log10() +
  scale_y_log10() +
  geom_encircle(mapping = aes(x = wf.b_avg, y = wf.g_avg, fill = FAO_shortgroup),alpha=0.3)

Still not particularly informative. There seems to be a strong 1:1 relationship between Blue WF and Green WF, which intuitively does not make very much sense. California grows lots of foods in regions that have nearly zero rainfall (Imperial county comes to mind.) I’d have to think about what is going on here.

Let’s take a look at overall crop water demand vs ppt.

ggplot(data = wf.master.allcrop.cyear) +
  geom_point(mapping = aes(x = cwr_avg, y = ppt_avg, color = FAO_shortgroup)) +
  scale_x_log10() +
  scale_y_log10() +
  geom_encircle(mapping = aes(x = cwr_avg, y = ppt_avg, fill = FAO_shortgroup),alpha=0.3)

Since Green WF is directly related to precipitation, and Blue WF is directly related to the crop water requirement we would expect the plots to look similar, and in fact they do look like affine transformations of each other.

What if we maintain the same groupings, but color (and size) by crop water requirement instead?

ggplot(data = wf.master.allcrop.cyear) +
  geom_point(mapping = aes(x = wf.b_avg, y = wf.g_avg, size = cwr_avg, color = cwr_avg)) +
  scale_x_log10() +
  scale_y_log10() +
  scale_color_viridis(option = "plasma")

Let’s try to identify some of these high values, using the criteria of points that fall outside of the upper and lower quartile, plus/minux 1.5 times the interquartile range (defined earlier, see is_extreme().

First, aggregrated by calendar years:

# param.labs <- c(
#   wf.b_avg = `"Average Blue WF ("m^3~ tonne^-1*")"`,
#   wf.g_avg = `"Average Green WF ("m^3~ tonne^-1*")"`)

param.labs <- c(
  wf.b_avg = "Average Blue WF",
  wf.g_avg = "Average Green WF")

wf.master.allcrop.cyear %>%
  mutate(outlier = ifelse(is_extreme(cwr_avg), cdl.name, NA)) %>%
  mutate(funs(factor), outlier) %>%
  ggplot() +
  geom_point(mapping = aes(x = wf.b_avg, y = wf.g_avg, size = cwr_avg, color = outlier)) +
  geom_label_repel(mapping = aes(x = wf.b_avg, y = wf.g_avg, label = outlier, fill = outlier), 
                   color = "white", segment.color = 'grey', box.padding = unit(0.35, "lines"), 
                   point.padding = unit(0.5, "lines"), na.rm = TRUE, nudge_x = -1)  +
  #geom_text(mapping = aes(x = wf.b_avg, y = wf.g_avg, label = outlier), na.rm = TRUE, check_overlap = TRUE, vjust="inward",hjust="inward")  +
  scale_x_continuous(trans = "log10", labels = scales::comma,
                     breaks = scales::trans_breaks("log10", function(x) 10^x)) +
  scale_y_continuous(trans = "log10", labels = scales::comma,
                     breaks = scales::trans_breaks("log10", function(x) 10^x)) +
  annotation_logticks() +
  labs(title = "Mean WF and CWR by crop for 2007 - 2015 calendar years",
       subtitle = "(log10 scale)",
       x = "Average Blue WF (m3/tonne)", y = "Average Green WF (m3/tonne)", 
       fill = "Extreme \nCrops \n(for CWR)", size = "Avg CWR \n(m3/yr)") +
  guides(color = "none") +
  theme(axis.text.y = element_text(angle = 90))

Next, aggregrated by water years:

param.labs <- c(
  wf.b_avg = "Average Blue WF",
  wf.g_avg = "Average Green WF")

wf.master.allcrop.wyear %>%
  mutate(outlier = ifelse(is_extreme(cwr_avg), cdl.name, NA)) %>%
  mutate(funs(factor), outlier) %>%
  ggplot() +
  geom_point(mapping = aes(x = wf.b_avg, y = wf.g_avg, size = cwr_avg, color = outlier)) +
  geom_label_repel(mapping = aes(x = wf.b_avg, y = wf.g_avg, label = outlier, fill = outlier), 
                   color = "white", segment.color = 'grey', box.padding = unit(0.35, "lines"), 
                   point.padding = unit(0.5, "lines"), na.rm = TRUE, nudge_x = -1)  +
  #geom_text(mapping = aes(x = wf.b_avg, y = wf.g_avg, label = outlier), na.rm = TRUE, check_overlap = TRUE, vjust="inward",hjust="inward")  +
  scale_x_continuous(trans = "log10", labels = scales::comma,
                     breaks = scales::trans_breaks("log10", function(x) 10^x)) +
  scale_y_continuous(trans = "log10", labels = scales::comma,
                     breaks = scales::trans_breaks("log10", function(x) 10^x)) +
  annotation_logticks() +
  labs(title = "Mean WF and CWR by crop for 2008 - 2015 water years",
       subtitle = "(log10 scale)",
       x = "Average Blue WF (m3/tonne)", y = "Average Green WF (m3/tonne)", 
       fill = "Extreme \nCrops \n(for CWR)", size = "Avg CWR \n(m3/yr)") +
  guides(color = "none") +
  theme(axis.text.y = element_text(angle = 90),)

In my opinion, the contrast between WF and CWU is most apparent when visualized with a treemap. We use the treemapify extension to ggplot2 to draw these treemaps. As of June 2017, treemapify is not found in CRAN, and must be installed through devtools from github ([link][https://github.com/wilkox/treemapify].

ggplot(data = wf.master.allcrop.wyear, 
       mapping = aes(area = cwr_avg, fill = wf.b_avg, label = cdl.name)) +
  geom_treemap() +
  geom_treemap_text(
    fontface = "italic",
    colour = "white",
    place = "centre",
    grow = TRUE) +
  scale_fill_viridis(name = "Blue WF", trans = "log", option = "viridis") +
  labs(title = "Mean Blue WF and CWR by crop for 2008 - 2015 water years",
       subtitle = "(CWU expressed as proportional areas, Blue WF expressed log10 fill scale)")

ggplot(data = wf.master.allcrop.wyear, 
       mapping = aes(area = cwr_avg, fill = wf.b_avg, 
                     label = cdl.name, subgroup = FAO_shortgroup)) +
  geom_treemap() +
  geom_treemap_subgroup_border() +
  geom_treemap_subgroup_text(place = "centre", grow = T, alpha = 0.5,
                             colour = "black", fontface = "italic", min.size = 0) +
  geom_treemap_text(colour = "white", place = "topleft", reflow = T) + 
  scale_fill_viridis(trans = "log", option = "viridis", labels = function(x) format(signif(x,1)/1, scientific = TRUE)) +
  labs(title = "Mean Blue WF and CWR by crop for 2008 - 2015 water years",
       subtitle = "(CWU expressed as proportional areas, Blue WF expressed log10 fill scale)",
       fill = bquote(atop("Blue WF", "("*m^3*"/tonne)"))) +
  theme(legend.text=element_text(size=10),
        legend.title=element_text(size=10))

And finally, we can tabulate the average CWU in acre-feet (AF) and thousand-acre-feet (TAF) for comparison to other tallies. Compared to Josué Medellín-Azuara, Jay Lund, Richard Howitt, 2015. Jobs per drop irrigating California crops. California WaterBlog., we underestimate crop ET by two orders of magnitude.

kable(wf.master.allcrop.wyear %>%
  select(cdl.name, cwr_avg) %>%
  mutate(AF = (cwr_avg * 0.0008107)) %>%
  mutate(TAF = (AF / 1000))
  )
cdl.name cwr_avg AF TAF
Corn 2.940342e+08 2.383735e+05 238.3734997
Cotton 1.164929e+07 9.444080e+03 9.4440796
Rice 3.630020e+07 2.942858e+04 29.4285756
Sorghum 3.366776e+05 2.729445e+02 0.2729445
Sunflower 1.519630e+06 1.231964e+03 1.2319640
Barley 7.458195e+06 6.046359e+03 6.0463590
Oats 3.629909e+07 2.942767e+04 29.4276708
Safflower 6.035132e+05 4.892681e+02 0.4892681
Alfalfa 2.369956e+09 1.921324e+06 1921.3235667
Other Hay/Non Alfalfa 1.053763e+09 8.542856e+05 854.2856486
Dry Beans 6.129875e+05 4.969490e+02 0.4969490
Potatoes 1.930627e+06 1.565159e+03 1.5651591
Misc Vegs & Fruits 2.683571e+07 2.175571e+04 21.7557063
Watermelons 2.631860e+06 2.133649e+03 2.1336491
Onions 4.783154e+06 3.877703e+03 3.8777028
Cucumbers 2.793742e+04 2.264887e+01 0.0226489
Peas 2.430546e+06 1.970443e+03 1.9704433
Tomatoes 1.005004e+08 8.147570e+04 81.4757010
Caneberries 4.196405e+05 3.402025e+02 0.3402025
Herbs 2.407502e+06 1.951762e+03 1.9517616
Cherries 6.349798e+07 5.147781e+04 51.4778144
Peaches 2.079860e+07 1.686142e+04 16.8614214
Apples 1.730969e+06 1.403297e+03 1.4032968
Grapes 1.176470e+09 9.537646e+05 953.7646164
Citrus 5.469024e+07 4.433738e+04 44.3373795
Almonds 3.347336e+08 2.713685e+05 271.3685125
Walnuts 5.356937e+08 4.342869e+05 434.2868843
Pears 3.505442e+06 2.841862e+03 2.8418616
Pistachios 3.729351e+06 3.023385e+03 3.0233851
Triticale 2.030540e+06 1.646159e+03 1.6461586
Carrots 2.261697e+06 1.833558e+03 1.8335578
Asparagus 3.534836e+04 2.865691e+01 0.0286569
Garlic 1.424974e+05 1.155226e+02 0.1155226
Cantaloupes 7.500758e+05 6.080865e+02 0.6080865
Olives 6.006235e+07 4.869254e+04 48.6925443
Oranges 4.580098e+06 3.713086e+03 3.7130855
Honeydew Melons 3.702208e+04 3.001380e+01 0.0300138
Broccoli 6.759166e+05 5.479656e+02 0.5479656
Peppers 2.762135e+06 2.239263e+03 2.2392630
Pomegranates 1.178250e+05 9.552072e+01 0.0955207
Nectarines 2.002918e+06 1.623765e+03 1.6237654
Greens 1.442341e+05 1.169305e+02 0.1169305
Plums 2.134729e+07 1.730625e+04 17.3062463
Strawberries 1.744838e+07 1.414540e+04 14.1454046
Squash 1.692833e+05 1.372380e+02 0.1372380
Apricots 1.313932e+06 1.065205e+03 1.0652048
Lettuce 2.111096e+05 1.711466e+02 0.1711466
Pumpkins 3.105419e+04 2.517563e+01 0.0251756
Cabbage 3.140193e+04 2.545754e+01 0.0254575

Figure: Average water demand by county for all years

Calculate average for all years

Note that it’s absolutely ridiculous that the we have to explicitly ungroup at the end, but things will fail in mysterious ways later on if we don’t remove the grouping metadata

wf.master.allcounty.cyear <- wf.master.cyear %>%
  group_by(roi.index, County, Year) %>%
  summarize_at(vars(cwr, ppt, et.b, et.g, wf.b, wf.g), funs(sum(., na.rm = TRUE))) %>%
  group_by(roi.index, County) %>%
  summarize_at(vars(cwr, ppt, et.b, et.g, wf.b, wf.g), funs(avg = "mean")) %>%
  left_join(roi.table %>% select(NUM,`HR_NAME`),
            by = c("roi.index" = "NUM")) %>%
  ungroup()

wf.master.allcounty.wyear <- wf.master.wyear %>%
  group_by(roi.index, County, Year) %>%
  summarize_at(vars(cwr, ppt, et.b, et.g, wf.b, wf.g), funs(sum(., na.rm = TRUE))) %>%
  group_by(roi.index, County) %>%
  summarize_at(vars(cwr, ppt, et.b, et.g, wf.b, wf.g), funs(avg = "mean")) %>%
  left_join(roi.table %>% select(NUM,`HR_NAME`),
            by = c("roi.index" = "NUM")) %>%
  ungroup()

Plot average Blue WF vs Green WF vs Crop Water Requirement, county-wise

# param.labs <- c(
#   wf.b_avg = `"Average Blue WF ("m^3~ tonne^-1*")"`,
#   wf.g_avg = `"Average Green WF ("m^3~ tonne^-1*")"`)

param.labs <- c(
  wf.b_avg = "Average Blue WF",
  wf.g_avg = "Average Green WF")

wf.master.allcounty.cyear %>%
  mutate(outlier = ifelse(is_extreme(cwr_avg), County, NA)) %>%
  mutate(funs(factor), outlier) %>%
  ggplot() +
  geom_point(mapping = aes(x = wf.b_avg, y = wf.g_avg, size = cwr_avg, color = outlier)) +
  geom_label_repel(mapping = aes(x = wf.b_avg, y = wf.g_avg, label = outlier, fill = outlier), 
                   color = "white", segment.color = 'grey', box.padding = unit(0.35, "lines"), 
                   point.padding = unit(0.5, "lines"), na.rm = TRUE, nudge_x = -1)  +
  #geom_text(mapping = aes(x = wf.b_avg, y = wf.g_avg, label = outlier), na.rm = TRUE, check_overlap = TRUE, vjust="inward",hjust="inward")  +
  scale_x_continuous(trans = "log10", labels = scales::comma,
                     breaks = scales::trans_breaks("log10", function(x) 10^x)) +
  scale_y_continuous(trans = "log10", labels = scales::comma,
                     breaks = scales::trans_breaks("log10", function(x) 10^x)) +
  annotation_logticks() +
  labs(title = "Mean WF and CWR by crop for 2007 - 2015 CALENDAR years",
       subtitle = "(log10 scale)",
       x = "Average Blue WF (m3/tonne)", y = "Average Green WF (m3/tonne)", 
       fill = "Extreme \nCounties \n(for CWR)", size = "Avg CWR \n(m3/yr)") +
  guides(color = "none") +
  theme(axis.text.y = element_text(angle = 90))

param.labs <- c(
  wf.b_avg = "Average Blue WF",
  wf.g_avg = "Average Green WF")

wf.master.allcounty.wyear %>%
  mutate(outlier = ifelse(is_extreme(cwr_avg), County, NA)) %>%
  mutate(funs(factor), outlier) %>%
  ggplot() +
  geom_point(mapping = aes(x = wf.b_avg, y = wf.g_avg, size = cwr_avg, color = outlier)) +
  geom_label_repel(mapping = aes(x = wf.b_avg, y = wf.g_avg, label = outlier, fill = outlier), 
                   color = "white", segment.color = 'grey', box.padding = unit(0.35, "lines"), 
                   point.padding = unit(0.5, "lines"), na.rm = TRUE, nudge_x = -1)  +
  #geom_text(mapping = aes(x = wf.b_avg, y = wf.g_avg, label = outlier), na.rm = TRUE, check_overlap = TRUE, vjust="inward",hjust="inward")  +
  scale_x_continuous(trans = "log10", labels = scales::comma,
                     breaks = scales::trans_breaks("log10", function(x) 10^x)) +
  scale_y_continuous(trans = "log10", labels = scales::comma,
                     breaks = scales::trans_breaks("log10", function(x) 10^x)) +
  annotation_logticks() +
  labs(title = "Mean WF and CWR by crop for 2008 - 2015 water years",
       subtitle = "(log10 scale)",
       x = "Average Blue WF (m3/tonne)", y = "Average Green WF (m3/tonne)", 
       fill = "Extreme \nCounties \n(for CWR)", size = "Avg CWR \n(m3/yr)") +
  guides(color = "none") +
  theme(axis.text.y = element_text(angle = 90))

param.labs <- c(
  wf.b_avg = "Average Blue WF",
  wf.g_avg = "Average Green WF")

# Note: Uses cowplot::get_legend and cowplot::plot_grid
local({
  p1 <- wf.master.allcounty.wyear %>%
    mutate(outlier = ifelse(is_extreme(cwr_avg), County, NA)) %>%
    mutate(funs(factor), outlier) %>%
    ggplot() +
    geom_point(mapping = aes(x = cwr_avg, y = ppt_avg, color = HR_NAME)) +
    geom_encircle(mapping = aes(x = cwr_avg, y = ppt_avg, fill = HR_NAME), alpha=0.3) +
    geom_label_repel(mapping = aes(x = cwr_avg, y = ppt_avg, label = outlier, fill = HR_NAME), 
                      color = "white", segment.color = 'grey', box.padding = unit(0.35, "lines"), 
                      point.padding = unit(0.5, "lines"), na.rm = TRUE, nudge_x = -1)  +
    scale_x_continuous(trans = "log10", labels = scales::comma,
                       breaks = scales::trans_breaks("log10", function(x) 10^x)) +
    scale_y_continuous(trans = "log10", labels = scales::comma,
                       breaks = scales::trans_breaks("log10", function(x) 10^x)) +
    annotation_logticks() +
    labs(title = "Mean PPT and CWU by county for 2008 - 2015 water years",
         subtitle = "Top CWU counties highlighted (log10 scale)",
         x = "Average county CWU (m3/yr)", y = "Average county PPT (m3/yr)") +
    guides(color = "none", fill = "none") +
    theme(axis.text.y = element_text(angle = 90))
  
  p2 <- wf.master.allcounty.wyear %>%
    mutate(outlier = ifelse(is_extreme(cwr_avg), County, NA)) %>%
    mutate(funs(factor), outlier) %>%
    ggplot() +
    geom_point(mapping = aes(x = wf.b_avg, y = wf.g_avg, size = cwr_avg, color = HR_NAME)) +
    geom_encircle(mapping = aes(x = wf.b_avg, y = wf.g_avg, fill = HR_NAME),alpha=0.3) + 
    # geom_label_repel(mapping = aes(x = wf.b_avg, y = wf.g_avg, label = outlier, fill = outlier), 
    #                  color = "white", segment.color = 'grey', box.padding = unit(0.35, "lines"), 
    #                  point.padding = unit(0.5, "lines"), na.rm = TRUE, nudge_x = -1)  +
    scale_x_continuous(trans = "log10", labels = scales::comma,
                       breaks = scales::trans_breaks("log10", function(x) 10^x)) +
    scale_y_continuous(trans = "log10", labels = scales::comma,
                       breaks = scales::trans_breaks("log10", function(x) 10^x)) +
    annotation_logticks() +
    labs(title = "Mean WF and CWR by county for 2008 - 2015 water years",
         subtitle = "(log10 scale)",
         x = "Average Blue WF (m3/tonne)", y = "Average Green WF (m3/tonne)", 
         fill = "Hydrologic \nRegion", size = "Avg CWR \n(m3/yr)") +
    guides(color = "none", fill = "none") +
    theme(axis.text.y = element_text(angle = 90), 
          legend.justification = c(1, .1), legend.position = c(1, .1),
          legend.background = element_rect(fill="transparent"))

  l1 <- get_legend(p2 + guides(size = "none", fill = guide_legend()) + theme(legend.position="bottom"))
  plot_grid(arrangeGrob(p1, p2, ncol=2), l1, ncol = 1, rel_heights = c(1, .2))
  #grid.arrange(arrangeGrob(p1, p2, ncol=2), l1, nrow = 2)
})

Finally, we can use the same treemap visualization, grouped by counties and hydrological regions, instead of crops and commodity groups.

ggplot(data = wf.master.allcounty.wyear, 
       mapping = aes(area = cwr_avg, fill = wf.b_avg, label = County)) +
  geom_treemap() +
  geom_treemap_text(
    fontface = "italic",
    colour = "white",
    place = "centre",
    grow = TRUE) +
  scale_fill_viridis(name = "Blue WF", trans = "log", option = "viridis") +
  labs(title = "Mean Blue WF and CWR by county for 2008 - 2015 water years",
       subtitle = "(CWU expressed as proportional areas, Blue WF expressed log10 fill scale)")

ggplot(data = wf.master.allcounty.wyear, 
       mapping = aes(area = cwr_avg, fill = wf.b_avg, 
                     label = County, subgroup = HR_NAME)) +
  geom_treemap() +
  geom_treemap_subgroup_border() +
  geom_treemap_subgroup_text(place = "centre", grow = T, alpha = 0.5,
                             colour = "black", fontface = "italic", min.size = 0) +
  geom_treemap_text(colour = "white", place = "topleft", reflow = T) + 
  scale_fill_viridis(name = "Blue WF", trans = "log", option = "viridis") +
  labs(title = "Mean Blue WF and CWR by county for 2008 - 2015 water years",
       subtitle = "(CWU expressed as proportional areas, Blue WF expressed log10 fill scale)")

Calculate average for all years

Note that it’s absolutely ridiculous that the we have to explicitly ungroup at the end, but things will fail in mysterious ways later on if we don’t remove the grouping metadata

wf.sum.allcrop.cyear <- wf.master.cyear %>%
  group_by(cdl.value, cdl.name, Year) %>%
  summarize_at(vars(cwr, ppt, et.b, et.g, wf.b, wf.g), funs(avg = sum(., na.rm = TRUE))) %>%
  left_join(cdl.table %>% select(value,`FAO_shortgroup`),
            by = c("cdl.value" = "value")) %>%
  ungroup()

wf.sum.allcrop.wyear <- wf.master.wyear %>%
  group_by(cdl.value, cdl.name, Year) %>%
  summarize_at(vars(cwr, ppt, et.b, et.g, wf.b, wf.g), funs(avg = sum(., na.rm = TRUE))) %>%
  left_join(cdl.table %>% 
              select(value,`FAO_shortgroup`),
            by = c("cdl.value" = "value")) %>%
  ungroup()

And finally, we can tabulate the average CWU in acre-feet (AF) and thousand-acre-feet (TAF) for comparison to other tallies. Compared to Josué Medellín-Azuara, Jay Lund, Richard Howitt, 2015. Jobs per drop irrigating California crops. California WaterBlog.,

options("scipen"=100, "digits"=4)
cwu.sum <- wf.sum.allcrop.wyear %>%
  filter(Year == 2010) %>%
  select(cdl.name, cwr_avg) %>%
  mutate(AF = (cwr_avg * 0.0008107)) %>%
  mutate(TAF = (AF / 1000))
sum(cwu.sum[["TAF"]])
## [1] 4968
kable(cwu.sum)
cdl.name cwr_avg AF TAF
Corn 284854902.8 230931.8697 230.9319
Cotton 14114778.0 11442.8505 11.4429
Rice 35955949.7 29149.4884 29.1495
Sorghum 140983.8 114.2955 0.1143
Sunflower 14596.8 11.8336 0.0118
Barley 17793176.5 14424.9282 14.4249
Oats 44708901.4 36245.5063 36.2455
Safflower 929409.3 753.4722 0.7535
Alfalfa 2422196289.6 1963674.5320 1963.6745
Other Hay/Non Alfalfa 1473414022.4 1194496.7479 1194.4967
Dry Beans 149861.1 121.4924 0.1215
Potatoes 9383.6 7.6073 0.0076
Misc Vegs & Fruits 1144724.0 928.0278 0.9280
Watermelons 79286.9 64.2779 0.0643
Onions 52848.8 42.8445 0.0428
Peas 959080.7 777.5267 0.7775
Tomatoes 38754409.6 31418.1999 31.4182
Cherries 55044713.9 44624.7496 44.6247
Peaches 21061136.0 17074.2629 17.0743
Apples 1106995.5 897.4413 0.8974
Grapes 772090884.0 625934.0797 625.9341
Citrus 18944888.3 15358.6209 15.3586
Almonds 259740048.7 210571.2575 210.5713
Walnuts 502868686.9 407675.6445 407.6756
Pistachios 17649547.0 14308.4877 14.3085
Triticale 6456404.8 5234.2074 5.2342
Garlic 75210.7 60.9733 0.0610
Cantaloupes 1655666.6 1342.2489 1.3422
Olives 107340939.7 87021.2998 87.0213
Oranges 4616479.5 3742.5799 3.7426
Peppers 1621.8 1.3148 0.0013
Plums 510914.8 414.1986 0.4142
Strawberries 21630945.8 17536.2078 17.5362
Squash 269.1 0.2181 0.0002
Apricots 1926055.7 1561.4534 1.5615
#freq_2010 <- freq(raster(lc.table[["abs_path"]][4]))

Let’s make some maps

ca.counties.tidy <- tidy(ca.counties) %>%
  mutate(id = as.numeric(id)) %>%
  left_join(roi.table %>% select(ID,`NUM`,`HR_NAME`),
            by = c("id" = "ID")) %>%
  left_join(wf.master.allcounty.wyear %>% select(`roi.index`,`cwr_avg`),
            by = c("NUM" = "roi.index"))
## Regions defined for each Polygons
ggplot(ca.counties.tidy) +
  geom_polygon(mapping = aes(x = long, y = lat, group = group, fill = cwr_avg)) +
  scale_fill_viridis(trans = "log", option = "viridis", labels = function(x) format(signif(x,1)/1, scientific = TRUE)) +
  geom_path(mapping = aes(x = long, y = lat, group = group), color="white") +
  coord_equal() +
  labs(title = "Mean annual CWR by county, 2008 - 2015 water years",
       subtitle = "(log10 scale)",
       x = NULL, y = NULL, 
       fill = bquote("CWU"~"("*m^3*")")) +
  theme(axis.text = element_blank(), 
        axis.ticks = element_blank(),
        line = element_blank(),
        rect = element_blank(),
        legend.text=element_text(size=10),
        legend.title=element_text(size=10))

ca.hr.tidy <- tidy(ca.hr) %>%
  left_join(rownames_to_column(ca.hr@data), 
            by = c("id" = "rowname")) %>%
  left_join(wf.master.allcounty.wyear %>%
              group_by(`HR_NAME`) %>%
              summarize_at(vars(cwr_avg, ppt_avg, et.b_avg, et.g_avg, wf.b_avg, wf.g_avg), funs(sum(., na.rm = TRUE))),
            by = c("HR_NAME" = "HR_NAME"))
## Regions defined for each Polygons
## Warning: Column `HR_NAME` joining factor and character vector, coercing
## into character vector
ggplot(ca.hr.tidy) +
  geom_polygon(mapping = aes(x = long, y = lat, group = group, fill = cwr_avg)) +
  scale_fill_viridis(trans = "log", option = "viridis", labels = function(x) format(signif(x,1)/1, scientific = TRUE)) +
  geom_path(mapping = aes(x = long, y = lat, group = group), color="white") +
  coord_equal() +
  labs(title = "Mean annual CWR by DWR hydrologic region, 2008 - 2015 water years",
       subtitle = "(log10 scale)",
       x = NULL, y = NULL, 
       fill = bquote("CWU"~"("*m^3*")")) +
  theme(axis.text = element_blank(), 
        axis.ticks = element_blank(),
        line = element_blank(),
        rect = element_blank(),
        legend.text=element_text(size=10),
        legend.title=element_text(size=10))